clear all
set more off
set mem 5000m
cap log close
log using descriptives.log, replace 
log off

*dataset with reconstructed families of men and incomes in wide, generated by "triplet-data-creation-20160706.do"
 use D:\Data\WorkData\702092\paul\triplet\201607\earnwide-triplet-father-1935-1969-son-1955-1989
*use earnwide-triplet-father-1935-1969-son-1955-1989

de

ge bob = border

ge yob = year(dob)
ge mob = month(dob)

ta  yob bob


drop if bob >2 									//third kids and onwards//


*drop brothers too close or too far apart.
*Get yob of sons at the individual level		//they'll be missing in the right places for incomplete dynasties//
sort pnrf bob 
ge yobf=yob if bob==0
ge mobf = mob if bob==0
sort pnrf yobf
by pnrf: replace yobf=yobf[1] if yobf==.
by pnrf: replace mobf=mobf[1] if mobf==.

sort pnrf bob 
ge yobs1=yob if bob==1
ge mobs1 = mob if bob==1
sort pnrf yobs1
by pnrf: replace yobs1=yobs1[1] if yobs1==.
by pnrf: replace mobs1=mobs1[1] if mobs1==.



sort pnrf bob 
ge yobs2=yob if bob==2
ge mobs2=mob if bob==2
sort pnrf yobs2
by pnrf: replace yobs2=yobs2[1] if yobs2==.
by pnrf: replace mobs2=mobs2[1] if mobs2==.






ta  yob bob
drop if yobs1 < 1959 | yobs1 > 1985		//too few first sons born in 1955, and they are too young (to enter profile at 25 and have 5 yrs data) if >1982//



ge agedifffs1= 12*yobs1 + mobs1 - (12*yobf + mobf) //drop families with age at first birth less than 18//
drop if agedifffs1 <216 
ta  yob bob

ge agediffs1s2= 12*yobs2 + mobs2 - (12*yobs1 + mobs1) //keep families with brother age spacing 1-12 yrs//
drop if agediffs1s2<12 
drop if  agediffs1s2>144 & agediffs1s2!=. & bob==2

ta  yob bob

drop if yob < 1962 & bob==2			//drop 2nd sons too old or too young 
drop if yob > 1985 & bob==2

ta  yob bob


drop agediff* yobs* yobf mobs* mobf

save fsearnwide_b,replace

forv i =  1980(1)2014 { 
	use fsearnwide_b
	keep pnr pnrf yob mob bob earn`i' 
	ge year = `i'
	rename earn`i' earnings
	save  earnings`i', replace            
}

use earnings1980, clear
forv i =  1981(1)2014 { 
	append using earnings`i'
	erase earnings`i'.dta
}


save earnings_longdescriptives, replace
erase earnings1980.dta
	
ge cohort=yob
forv y = 1936(3)1984 {
	local do = `y' - 1
	local up = `y' + 1
		replace cohort =  `y' if yob== `do'|yob== `up'
}



ge age=year-yob
ge ageco=year-cohort

drop if ageco <25 | ageco > 60






*EARNINGS TRIMMINGS
drop if earnings==.		//drop individual observations with missing earnings
drop if earnings <=0	//drop negative earnings




*Household structure******************************************************************


sort pnrf bob year
by pnrf: ge bob1=bob[1]			//gives the bob of the first dynasty member observed//
by pnrf: ge bobN=bob[_N]			//gives the bob of the last dynasty member observed//



by pnrf: ge bobhole=bob[_n]==2&bob[_n-1]==0
mvencode bobhole, mv(0) o
sort pnrf bobhole
by pnrf: replace bobhole=bobhole[_N]			//tells if there is any missing first son in dynasties that have son 2//
*************************************************************************************

keep if bob1==0 & bobN>0&bobhole==0   //only families where there is  the father and the first son//
cap drop prime*
bysort pnr: ge prime=_n==1
bysort pnrf: ge primedyn=_n==1

sort pnrf bob
cap drop bobN
by pnrf:ge bobN = bob[_N]
keep if bobN==2



sort pnrf bob					//drops the families of too young fathers//
by pnrf: ge yobF=yob[1]
drop if yobF > 1961

log on							// Table 1, Left Panel
ta bob if prime
ta bob
ta bob if year==1990, summ(earn)
ta bob if year==1990, summ(ageco)
ta bob if year==1995, summ(earn)
ta bob if year==1995, summ(ageco)
ta bob if year==2000, summ(earn)
ta bob if year==2000, summ(ageco)
ta bob if year==2005, summ(earn)
ta bob if year==2005, summ(ageco)
ta bob if year==2010, summ(earn)
ta bob if year==2010, summ(ageco)
ta bob if year==2014, summ(earn)
ta bob if year==2014, summ(ageco)

log off



log on 							// Table 1, Right Panel
use earnings_long, clear

ta bob if prime
ta bob
ta bob if year==1990, summ(earn)
ta bob if year==1990, summ(ageco)
ta bob if year==1995, summ(earn)
ta bob if year==1995, summ(ageco)
ta bob if year==2000, summ(earn)
ta bob if year==2000, summ(ageco)
ta bob if year==2005, summ(earn)
ta bob if year==2005, summ(ageco)
ta bob if year==2010, summ(earn)
ta bob if year==2010, summ(ageco)
ta bob if year==2014, summ(earn)
ta bob if year==2014, summ(ageco)



log close







